#Load in the needed packages
library(ggplot2)
library(reshape2)
library(dplyr)
library(readr)
library(plotly)
library(tidyverse)
library(ggvis)
library(viridis)
library(RColorBrewer)
library(stats)
library(reshape)
# Load all the csv files
Country <- read_csv("~/working/data_science_folders/data_munging_EDA/group_project_2_munging/world-development-indicators/Country.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## LatestIndustrialData = col_double(),
## LatestTradeData = col_double(),
## LatestWaterWithdrawalData = col_double()
## )
## See spec(...) for full column specifications.
CountryNotes <- read_csv("~/working/data_science_folders/data_munging_EDA/group_project_2_munging/world-development-indicators/CountryNotes.csv")
## Parsed with column specification:
## cols(
## Countrycode = col_character(),
## Seriescode = col_character(),
## Description = col_character()
## )
Footnotes <- read_csv("~/working/data_science_folders/data_munging_EDA/group_project_2_munging/world-development-indicators/Footnotes.csv")
## Parsed with column specification:
## cols(
## Countrycode = col_character(),
## Seriescode = col_character(),
## Year = col_character(),
## Description = col_character()
## )
Indicators <- read_csv("~/working/data_science_folders/data_munging_EDA/group_project_2_munging/world-development-indicators/Indicators.csv")
## Parsed with column specification:
## cols(
## CountryName = col_character(),
## CountryCode = col_character(),
## IndicatorName = col_character(),
## IndicatorCode = col_character(),
## Year = col_double(),
## Value = col_double()
## )
Series <- read_csv("~/working/data_science_folders/data_munging_EDA/group_project_2_munging/world-development-indicators/Series.csv",
comment = "#")
## Parsed with column specification:
## cols(
## .default = col_character(),
## OtherNotes = col_logical(),
## OtherWebLinks = col_logical(),
## RelatedIndicators = col_logical()
## )
## See spec(...) for full column specifications.
SeriesNotes <- read_csv("~/working/data_science_folders/data_munging_EDA/group_project_2_munging/world-development-indicators/SeriesNotes.csv")
## Parsed with column specification:
## cols(
## Seriescode = col_character(),
## Year = col_character(),
## Description = col_character()
## )
Now that all our data is loaded in, the next step would be to list all of the different indicators present in this dataset.
#Make the country name a factor
Indicators$CountryName <- as.factor(Indicators$CountryName)
#Get the counts
counts <- Indicators %>%
group_by(IndicatorCode, IndicatorName) %>%
summarise(NumCountries = n_distinct(CountryName),
NumYears = n_distinct(Year),
FirstYear = min(Year),
LastYear = max(Year))
counts$IndicatorName <- gsub("\\$", "dollar", counts$IndicatorName)
View(counts)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE):
## running command ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/
## Resources/modules/R_de.so'' had status 1
#Create BRICS indicator
brics <- c("BRA", "RUS", "IND", "CHN", "ZAF")
brics.Indicators <- subset(Indicators, CountryCode %in% brics)
#Create the GDP growth (annual %)
GDP_growth_annual_percent <- subset(brics.Indicators, IndicatorCode == "NY.GDP.MKTP.KD.ZG")
#Plot the variable
ggplot(data = GDP_growth_annual_percent, aes(Year, Value)) +
geom_line(aes(color = CountryCode), size = 0.85) +
scale_x_continuous(breaks = seq(1960, 2014, 5)) +
theme_bw(base_size = 12, base_family = "Helvetica")+
theme_classic() +
labs(title = "GDP growth increased for most BRICS, especially China") +
xlab("Year") +
ylab("Annual Percent Growth")
#Create the GDP per capita variable current US dollar
GDP_per_capita_current_US <- subset(brics.Indicators, IndicatorCode == "NY.GDP.PCAP.CD")
#Plot the variable
ggplot(data = GDP_per_capita_current_US, aes(Year, Value)) +
geom_line(aes(color = CountryCode), size = 0.85) +
scale_x_continuous(breaks = seq(1960, 2014, 5)) +
theme_bw(base_size = 12, base_family = "Helvetica")+
theme_classic() +
labs(title = "GDP per capita has risen sharply for most countries since the early 2000s") +
xlab("Year") +
ylab("Current US Dollar")
#Create the GDP per capita, PPP (current international $)
GDP_per_capita_PPP_international <- subset(brics.Indicators, IndicatorCode == "NY.GDP.PCAP.PP.CD")
#Plot the variable
ggplot(data = GDP_per_capita_PPP_international, aes(Year, Value)) +
geom_line(aes(color = CountryCode), size = 0.85) +
scale_x_continuous(breaks = seq(1960, 2014, 5)) +
theme_bw(base_size = 12, base_family = "Helvetica")+
theme_classic() +
labs(title = "GDP per capita, converted through PPP shows Russia is ahead") +
xlab("Year") +
ylab("Current International Dollar")
#Check for adjusted net national income
adjusted_net_national_income <- subset(brics.Indicators, IndicatorCode == "NY.ADJ.NNTY.CD")
#Plot the variable
ggplot(data = adjusted_net_national_income, aes(Year, Value)) +
geom_line(aes(color = CountryCode), size = 0.85) +
scale_x_continuous(breaks = seq(1960, 2014, 5)) +
theme_bw(base_size = 12, base_family = "Helvetica")+
theme_classic() +
labs(title = "Adjusted Net National Income has risen sharply for China") +
xlab("Year") +
ylab("Current US dollar")
#Check for Poverty gap at $1.90 a day (2011 PPP) (%)
poverty_gap <- dplyr::filter(brics.Indicators,
IndicatorCode == "SI.POV.GAPS" |
IndicatorCode == "NY.GDP.PCAP.CD") %>%
dplyr::filter( Year == 1993
) %>%
pivot_wider(names_from = "IndicatorCode",
values_from = "Value")
pg_a <- filter(poverty_gap, IndicatorName == "GDP per capita (current US$)") %>%
select(-IndicatorName, -SI.POV.GAPS)
pg_b <- filter(poverty_gap, IndicatorName == "Poverty gap at $1.90 a day (2011 PPP) (%)") %>%
select(CountryCode, SI.POV.GAPS)
poverty_gap <- left_join(pg_a, pg_b, by = "CountryCode")
ggplot(data = poverty_gap,
aes(x = SI.POV.GAPS,
y = NY.GDP.PCAP.CD,
color = CountryName)) +
geom_point(size = 3) +
labs(y = "GDP per capita",
x = "Poverty gap at $1.90 a day (2011 PPP) (%)",
title = "Higher per capita GDP is associated with a smaller poverty gap",
caption = "Data from 1993") +
xlim(0,50) +
theme_classic()
poverty_gap <- subset(brics.Indicators, IndicatorCode == "SI.POV.GAPS")
ggplot(data = poverty_gap, mapping = aes(x = CountryName, y= Value)) +
geom_boxplot()
poverty_gap %>%
ggplot( aes(x=CountryName, y=Value, fill=CountryName)) +
geom_violin() +
scale_fill_viridis(discrete = TRUE, alpha=0.7, option="A") +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("Violin chart showing how BRICS countries vary across poverty values") +
xlab("BRICS Countries") +
ylab("Poverty gap at $1.90 a day (2011 PPP) (%)")
#Check for Poverty gap at $3.10 a day (2011 PPP) (%)
poverty_gap <- dplyr::filter(brics.Indicators,
IndicatorCode == "SI.POV.GAP2" |
IndicatorCode == "NY.GDP.PCAP.CD") %>%
dplyr::filter( Year == 1993
) %>%
pivot_wider(names_from = "IndicatorCode",
values_from = "Value")
pg_a <- filter(poverty_gap, IndicatorName == "GDP per capita (current US$)") %>%
select(-IndicatorName, -SI.POV.GAP2)
pg_b <- filter(poverty_gap, IndicatorName == "Poverty gap at $3.10 a day (2011 PPP) (%)") %>%
select(CountryCode, SI.POV.GAP2)
poverty_gap <- left_join(pg_a, pg_b, by = "CountryCode")
ggplot(data = poverty_gap,
aes(x = SI.POV.GAP2,
y = NY.GDP.PCAP.CD,
color = CountryName)) +
geom_point(size = 3) +
labs(y = "GDP per capita",
x = "Poverty gap at $3.10 a day (2011 PPP) (%)",
title = "Higher per capita GDP is associated with a smaller poverty gap",
caption = "Data from 1993") +
xlim(0,50) +
theme_classic()
poverty_gap <- subset(brics.Indicators, IndicatorCode == "SI.POV.GAP2")
ggplot(data = poverty_gap, mapping = aes(x = CountryName, y= Value)) +
geom_boxplot()
poverty_gap %>%
ggplot( aes(x=CountryName, y=Value, fill=CountryName)) +
geom_violin() +
scale_fill_viridis(discrete = TRUE, alpha=0.7, option="A") +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("Violin chart showing how BRICS countries vary across poverty values") +
xlab("BRICS Countries") +
ylab("Poverty gap at $3.10 a day (2011 PPP) (%)")
#Check for Poverty headcount ratio at $1.90 a day
poverty_gap <- dplyr::filter(brics.Indicators,
IndicatorCode == "SI.POV.DDAY" |
IndicatorCode == "NY.GDP.PCAP.CD") %>%
dplyr::filter( Year == 1993
) %>%
pivot_wider(names_from = "IndicatorCode",
values_from = "Value")
pg_a <- filter(poverty_gap, IndicatorName == "GDP per capita (current US$)") %>%
select(-IndicatorName, -SI.POV.DDAY)
pg_b <- filter(poverty_gap, IndicatorName == "Poverty headcount ratio at $1.90 a day (2011 PPP) (% of population)") %>%
select(CountryCode, SI.POV.DDAY)
poverty_gap <- left_join(pg_a, pg_b, by = "CountryCode")
ggplot(data = poverty_gap,
aes(x = SI.POV.DDAY,
y = NY.GDP.PCAP.CD,
color = CountryName)) +
geom_point(size = 3) +
xlim(0,100) +
labs(y = "GDP per capita",
x = "Poverty headcount ratio at $1.90 a day (2011 PPP) (% of population)",
title = "Higher per capita GDP is associated with a smaller poverty headcount",
caption = "Data from 1993") +
theme_classic()
poverty_gap <- subset(brics.Indicators, IndicatorCode == "SI.POV.DDAY")
ggplot(data = poverty_gap, mapping = aes(x = CountryName, y= Value)) +
geom_boxplot()
poverty_gap %>%
ggplot( aes(x=CountryName, y=Value, fill=CountryName)) +
geom_violin() +
scale_fill_viridis(discrete = TRUE, alpha=0.7, option="A") +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("Violin chart showing how BRICS countries vary across poverty values") +
xlab("BRICS Countries") +
ylab("Poverty headcount ratio at $1.90 a day (2011 PPP) (% of population)")
#Check for Poverty headcount ratio at $3.10 a day
poverty_gap <- dplyr::filter(brics.Indicators,
IndicatorCode == "SI.POV.2DAY" |
IndicatorCode == "NY.GDP.PCAP.CD") %>%
dplyr::filter( Year == 1993
) %>%
pivot_wider(names_from = "IndicatorCode",
values_from = "Value")
pg_a <- filter(poverty_gap, IndicatorName == "GDP per capita (current US$)") %>%
select(-IndicatorName, -SI.POV.2DAY)
pg_b <- filter(poverty_gap, IndicatorName == "Poverty headcount ratio at $3.10 a day (2011 PPP) (% of population)") %>%
select(CountryCode, SI.POV.2DAY)
poverty_gap <- left_join(pg_a, pg_b, by = "CountryCode")
ggplot(data = poverty_gap,
aes(x = SI.POV.2DAY,
y = NY.GDP.PCAP.CD,
color = CountryName)) +
geom_point(size = 3) +
xlim(0,100) +
labs(y = "GDP per capita",
x = "Poverty headcount ratio at $3.10 a day (2011 PPP) (% of population)",
title = "Higher per capita GDP is associated with a smaller poverty headcount",
caption = "Data from 1993") +
theme_classic()
poverty_gap <- subset(brics.Indicators, IndicatorCode == "SI.POV.2DAY")
ggplot(data = poverty_gap, mapping = aes(x = CountryName, y= Value)) +
geom_boxplot()
poverty_gap %>%
ggplot( aes(x=CountryName, y=Value, fill=CountryName)) +
geom_violin() +
scale_fill_viridis(discrete = TRUE, alpha=0.7, option="A") +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("Violin chart showing how BRICS countries vary across poverty values") +
xlab("BRICS Countries") +
ylab("Poverty headcount ratio at $3.10 a day (2011 PPP) (% of population)")
#Check out Agricultural land (% of land area)
agriculture_percent <- dplyr::filter(brics.Indicators,
IndicatorCode == "AG.LND.AGRI.ZS" |
IndicatorCode == "NY.GDP.PCAP.CD") %>%
pivot_wider(names_from = "IndicatorCode",
values_from = "Value")
ap_a <- filter(agriculture_percent, IndicatorName == "GDP per capita (current US$)") %>%
select(-IndicatorName, -AG.LND.AGRI.ZS)
ap_b <- filter(agriculture_percent, IndicatorName == "Agricultural land (% of land area)") %>%
select(CountryCode, AG.LND.AGRI.ZS, Year)
agriculture_percent <- left_join(ap_a, ap_b, by = c("CountryCode" = "CountryCode", "Year" = "Year"))
start <- filter(agriculture_percent, Year == 1961 | Year == 1992 & CountryName == "Russian Federation")
ggplot(data = agriculture_percent,
aes(x = AG.LND.AGRI.ZS,
y = NY.GDP.PCAP.CD,
color = CountryName)) +
geom_path() +
geom_point(data = start,
aes(
x = AG.LND.AGRI.ZS,
y = NY.GDP.PCAP.CD)
) +
xlim(0,100) +
labs(y = "GDP per capita",
x = "Agricultural land (% of land area)",
title = "Changes in agricultural holdings seem unrelated to changes in GDP per capita",
caption = "Dot indicates first year of data."
) +
theme_classic()
## Warning: Removed 12 rows containing missing values (geom_path).
#Check out Agricultural land (% of land area)
agriculture_population <- dplyr::filter(brics.Indicators,
IndicatorCode == "AG.LND.AGRI.ZS" |
IndicatorCode == "SP.POP.GROW") %>%
pivot_wider(names_from = "IndicatorCode",
values_from = "Value")
agp_a <- filter(agriculture_population, IndicatorName == "Population growth (annual %)") %>%
select(-IndicatorName, -AG.LND.AGRI.ZS)
agp_b <- filter(agriculture_population, IndicatorName == "Agricultural land (% of land area)") %>%
select(CountryCode, AG.LND.AGRI.ZS, Year)
agriculture_population <- left_join(agp_a, agp_b, by = c("CountryCode" = "CountryCode", "Year" = "Year"))
start <- filter(agriculture_population, Year == 1961 | Year == 1992 & CountryName == "Russian Federation")
ggplot(data = agriculture_population,
aes(x = AG.LND.AGRI.ZS,
y = SP.POP.GROW,
color = CountryName)) +
geom_path() +
geom_point(data = start,
aes(
x = AG.LND.AGRI.ZS,
y = SP.POP.GROW)
) +
xlim(0,100) +
labs(y = "Population growth (annual %)",
x = "Agricultural land (% of land area)",
title = "No discernible relationship between population growth and agricultural land over time",
caption = "Dot indicates first year of data."
) +
theme_classic()
## Warning: Removed 41 rows containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
#Check the Agriculture, value added (% of GDP)
agrilcuture_value <- dplyr::filter(brics.Indicators,
IndicatorCode == "NV.AGR.TOTL.ZS" |
IndicatorCode == "NY.GDP.PCAP.CD") %>%
pivot_wider(names_from = "IndicatorCode",
values_from = "Value")
av_a <- filter(agrilcuture_value, IndicatorName == "GDP per capita (current US$)") %>%
select(-IndicatorName, -NV.AGR.TOTL.ZS)
av_b <- filter(agrilcuture_value, IndicatorName == "Agriculture, value added (% of GDP)") %>%
select(CountryCode, NV.AGR.TOTL.ZS, Year)
agrilcuture_value <- left_join(av_a, av_b, by = c("CountryCode" = "CountryCode", "Year" = "Year"))
start <- filter(agrilcuture_value, Year == 1961 | Year == 1992 & CountryName == "Russian Federation")
ggplot(data = agrilcuture_value,
aes(x = NV.AGR.TOTL.ZS,
y = NY.GDP.PCAP.CD)) +
geom_path() +
geom_point(data = start,
aes(
x = NV.AGR.TOTL.ZS,
y = NY.GDP.PCAP.CD)
) +
facet_grid(.~CountryName) +
xlim(0,50) +
labs(y = "GDP per capita",
x = "Agriculture, value added (% of GDP)",
title = "Overall contribution from agriculture doesn't seem to impact increases in GDP",
caption = "Dot indicates first year of data."
) +
theme_classic()
library(plotly)
all_countries <- Indicators %>%
select(CountryCode) %>%
group_by(CountryCode) %>%
summarise()
#Check the Agriculture, value added (% of GDP)
agrilcuture_value <- dplyr::filter(brics.Indicators,
IndicatorCode == "NV.AGR.TOTL.ZS" |
IndicatorCode == "NY.GDP.PCAP.CD") %>%
pivot_wider(names_from = "IndicatorCode",
values_from = "Value")
av_a <- filter(agrilcuture_value, IndicatorName == "GDP per capita (current US$)") %>%
select(-IndicatorName, -NV.AGR.TOTL.ZS)
av_b <- filter(agrilcuture_value, IndicatorName == "Agriculture, value added (% of GDP)") %>%
select(CountryCode, NV.AGR.TOTL.ZS, Year)
agrilcuture_value <- left_join(av_a, av_b, by = c("CountryCode" = "CountryCode", "Year" = "Year"))
agrilcuture_value <- left_join(all_countries, agrilcuture_value, by = "CountryCode")
agrilcuture_value$NV.AGR.TOTL.ZS <- replace_na(agrilcuture_value$NV.AGR.TOTL.ZS, 0)
# light grey boundaries
l <- list(color = toRGB("grey"), width = 0.5)
# specify map projection/options
g <- list(
showframe = FALSE,
showcoastlines = FALSE,
projection = list(type = 'Albers')
)
p <- plot_geo(agrilcuture_value) %>%
add_trace(
z = ~NV.AGR.TOTL.ZS,
color = ~NV.AGR.TOTL.ZS,
colors = 'Blues',
text = ~CountryName,
locations = ~CountryCode,
marker = list(line = l)
) %>%
colorbar(title = 'Agriculture, value added (% of GDP)') %>%
layout(
title = '',
geo = g
)
p
mortality_rate_under_5 <- subset(brics.Indicators, IndicatorCode == "SH.DYN.MORT")
ggplot(data = mortality_rate_under_5, aes(Year, Value)) +
geom_line(aes(color = CountryCode), size = 0.85) +
scale_x_continuous(breaks = seq(1960, 2014, 5)) +
theme_bw(base_size = 12, base_family = "Helvetica") +
theme_classic() +
ggtitle("Mortality rate for BRICS countries steadily decreases") +
ylab("Mortality rate, under-5")
#subsetting for BRICS countries
brics <- Indicators %>%
filter(Indicators$CountryName == "Brazil" | Indicators$CountryName == "India" | Indicators$CountryName == "South Africa" | Indicators$CountryName == "Russian Federation" | Indicators$CountryName == "China")
#View(brics)
ggplot(data = mortality_rate_under_5, mapping = aes(x = CountryName, y= Value)) +
geom_boxplot()
mortality_rate_under_5 %>%
ggplot( aes(x=CountryName, y=Value, fill=CountryName)) +
geom_violin() +
scale_fill_viridis(discrete = TRUE, alpha=0.7, option="A") +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("Distribution of mortality rates highlights differences in each countries path to lower infant mortality") +
xlab("BRICS Countries")
#Check out Mortality rate, under-5
population_mortality <- dplyr::filter(brics.Indicators,
IndicatorCode == "SH.DYN.MORT" |
IndicatorCode == "SP.POP.GROW") %>%
pivot_wider(names_from = "IndicatorCode",
values_from = "Value")
m_a <- filter(population_mortality, IndicatorName == "Mortality rate, under-5 (per 1,000)") %>%
select(-IndicatorName, -SP.POP.GROW)
m_b <- filter(population_mortality, IndicatorName == "Population growth (annual %)") %>%
select(CountryCode, SP.POP.GROW, Year)
population_mortality <- left_join(m_a, m_b, by = c("CountryCode" = "CountryCode", "Year" = "Year"))
start <- filter(population_mortality, Year == 1960 | Year == 1970 & CountryName == "Russian Federation" | Year == 1969 & CountryName == "China" | Year == 1974 & CountryName == "South Africa")
ggplot(data = population_mortality,
aes(x = SH.DYN.MORT,
y = SP.POP.GROW,
color = CountryName)) +
geom_path() +
geom_point(data = start,
aes(
x = SH.DYN.MORT,
y = SP.POP.GROW)
) +
labs(y = "Population growth (annual %)",
x = "Mortality rate, under-5",
title = "",
caption = "Dot indicates first year of data."
) +
theme_classic() +
ggtitle("Mortality rate decrease also shows a decrease in the population growth as well")
## Warning: Removed 5 rows containing missing values (geom_path).
unemployment_rate <- subset(brics.Indicators, IndicatorCode == "SL.UEM.TOTL.ZS")
ggplot(data = unemployment_rate, aes(Year, Value)) +
geom_line(aes(color = CountryCode), size = 0.85) +
scale_x_continuous(breaks = seq(1960, 2014, 5)) +
theme_bw(base_size = 12, base_family = "Helvetica") +
theme_classic() +
ggtitle("Unemployment remains low for the most part other than for South Africa") +
ylab("Unemployment,total (% of total labor force) (modeled ILO estimate)")
ggplot(data = unemployment_rate, mapping = aes(x = CountryName, y= Value)) +
geom_boxplot()
unemployment_rate %>%
ggplot( aes(x=CountryName, y=Value, fill=CountryName)) +
geom_violin() +
scale_fill_viridis(discrete = TRUE, alpha=0.7, option="A") +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("Unemployment,total (% of total labor force) (modeled ILO estimate)") +
xlab("BRICS Countries")
#Check out education against unemployment
unemployment_education <- dplyr::filter(brics.Indicators,
IndicatorCode == "SL.UEM.TOTL.ZS" |
IndicatorCode == "SE.XPD.TOTL.GD.ZS") %>%
pivot_wider(names_from = "IndicatorCode",
values_from = "Value")
pm_a <- filter(unemployment_education, IndicatorName == "Government expenditure on education as % of GDP (%)") %>%
select(-IndicatorName, -SL.UEM.TOTL.ZS)
pm_b <- filter(unemployment_education, IndicatorName == "Unemployment, total (% of total labor force)") %>%
select(CountryCode, SL.UEM.TOTL.ZS, Year)
unemployment_education <- left_join(pm_a, pm_b, by = c("CountryCode" = "CountryCode", "Year" = "Year"))
start <- filter(unemployment_education, Year == 1991 | Year == 2000 & CountryName == "Russian Federation" | Year == 1992 & CountryName == "China" | Year == 1995 & CountryName == "Brazil"| Year == 1997 & CountryName == "India")
ggplot(data = unemployment_education,
aes(x = SL.UEM.TOTL.ZS,
y = SE.XPD.TOTL.GD.ZS ,
color = CountryName)) +
geom_path() +
geom_point(data = start,
aes(
x = SL.UEM.TOTL.ZS,
y = SE.XPD.TOTL.GD.ZS )
) +
labs(y = "Unemployment,total (% of total labor force) (modeled ILO estimate)",
x = "Government expenditure on education, total (% of GDP)",
title = "",
caption = "Dot indicates first year of data."
) +
theme_classic() +
ggtitle("")
## Warning: Removed 22 rows containing missing values (geom_path).
population_growth_rate <- subset(brics.Indicators, IndicatorCode == "SP.POP.GROW")
ggplot(data = population_growth_rate, aes(Year, Value)) +
geom_line(aes(color = CountryCode), size = 0.85) +
scale_x_continuous(breaks = seq(1960, 2014, 5)) +
theme_bw(base_size = 12, base_family = "Helvetica") +
theme_classic() +
ggtitle("All BRICS countries steadily decrease their annual growth rates") +
ylab("Population growth (annual %)")
ggplot(data = population_growth_rate, mapping = aes(x = CountryName, y= Value)) +
geom_boxplot()
population_growth_rate %>%
ggplot( aes(x=CountryName, y=Value, fill=CountryName)) +
geom_violin() +
scale_fill_viridis(discrete = TRUE, alpha=0.7, option="A") +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("Population growth (annual %)") +
xlab("BRICS Countries")
govt_exp_edu <- subset(brics.Indicators, IndicatorCode == "SE.XPD.TOTL.GD.ZS")
ggplot(data = govt_exp_edu, aes(Year, Value)) +
geom_line(aes(color = CountryCode), size = 0.85) +
scale_x_continuous(breaks = seq(1960, 2014, 5)) +
theme_bw(base_size = 12, base_family = "Helvetica") +
theme_classic() +
ggtitle("Education spending differs substantially across countries and over time") +
ylab("Government expenditure on education, total (% of GDP)")
ggplot(data = govt_exp_edu, mapping = aes(x = CountryName, y= Value)) +
geom_boxplot()
govt_exp_edu %>%
ggplot( aes(x=CountryName, y=Value, fill=CountryName)) +
geom_violin() +
scale_fill_viridis(discrete = TRUE, alpha=0.7, option="A") +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
ggtitle("Population growth (annual %)") +
xlab("BRICS Countries")
brics <- Indicators %>%
filter(Indicators$CountryName == "Brazil" | Indicators$CountryName == "India" | Indicators$CountryName == "South Africa" | Indicators$CountryName == "Russian Federation" | Indicators$CountryName == "China")
#Filter four different expenditures as % of GDP
expenditures <- brics %>%
filter(brics$IndicatorName == "Military expenditure (% of GDP)" | brics$IndicatorName == "Research and development expenditure (% of GDP)" | brics$IndicatorName == "Health expenditure, total (% of GDP)" | brics$IndicatorName == "Government expenditure on education as % of GDP (%)")
#View(expenditures)
ggplot(data=expenditures, aes(x=stringr::str_wrap(CountryName, 10), y=Value, fill=IndicatorName)) +
geom_bar(stat="identity") +
xlab("BRICS Countries")
BRICS<-c("BRA","CHN","IND","RUS","ZAF")
military_exports <- subset(Indicators, IndicatorCode == "MS.MIL.XPRT.KD" & Year > 1991)
military_imports <- subset(Indicators, IndicatorCode == "MS.MIL.MPRT.KD" & Year > 1991)
total_reserves <- subset(Indicators, IndicatorCode == "FI.RES.TOTL.CD" & Year > 1991)
total_reserves_without_gold <- subset(Indicators, IndicatorCode == "FI.RES.XGLD.CD" & Year > 1991)
brics_uem_indicators <- subset(Indicators, IndicatorCode == "SL.UEM.TOTL.ZS" & Year > 1991)
#VALUE OF ARMS EXPORTS OF BRICS COUNTRIES
plot1<-ggplot(data=subset(military_exports, CountryCode %in% BRICS),aes(x=Year,y=Value,col=CountryName))+
geom_line(size=1)+ylab("Exports")+
ggtitle("ARMS EXPORTS OF BRICS COUNTRIES OVER THE YEARS")+
theme(text=element_text(colour="black",face="bold",size=12),
legend.title = element_text(colour="blue", size=16, face="bold"),
legend.text = element_text(colour="black", size = 10, face = "bold",family = "Times New Roman"),
legend.background = element_rect(fill="gray90", size=.5, linetype="dotted"))
plot1
#VALUE OF ARMS IMPORTS OF BRICS COUNTRIES
plot2<-ggplot(data=subset(military_imports, CountryCode %in% BRICS),aes(x=Year,y=Value,col=CountryName))+
geom_line(size=1)+ylab("Imports")+
ggtitle("ARMS IMPORTS OF BRICS COUNTRIES OVER THE YEARS")+
theme(text=element_text(colour="black",face="bold",size=12),
legend.title = element_text(colour="blue", size=16, face="bold"),
legend.text = element_text(colour="black", size = 10, face = "bold",family = "Times New Roman"),
legend.background = element_rect(fill="gray90", size=.5, linetype="dotted"))
plot2
#TOTAL RESERVES OF BRICS COUNTRIES
plot3<-ggplot(data=subset(total_reserves, CountryCode %in% BRICS),aes(x=Year,y=Value,col=CountryName))+
geom_line(size=1)+ylab("Total Reserves")+
ggtitle("TOTAL RESERVES OF BRICS COUNTRIES OVER THE YEARS")+
theme(text=element_text(colour="black",face="bold",size=12),
legend.title = element_text(colour="blue", size=16, face="bold"),
legend.text = element_text(colour="black", size = 10, face = "bold",family = "Times New Roman"),
legend.background = element_rect(fill="gray90", size=.5, linetype="dotted"))
plot3
#TOTAL RESERVES MINUS GOLD OF BRICS COUNTRIES
plot4<-ggplot(data=subset(total_reserves_without_gold, CountryCode %in% BRICS),aes(x=Year,y=Value,col=CountryName))+
geom_line(size=1)+ylab("Total Reserves")+
ggtitle("TOTAL RESERVES MINUS GOLD OF BRICS COUNTRIES OVER THE YEARS")+
theme(text=element_text(colour="black",face="bold",size=12),
legend.title = element_text(colour="blue", size=16, face="bold"),
legend.text = element_text(colour="black", size = 10, face = "bold",family = "Times New Roman"),
legend.background = element_rect(fill="gray90", size=.5, linetype="dotted"))
plot4
all_in_all <- brics %>%
filter(IndicatorCode == "SL.UEM.TOTL.ZS" | IndicatorCode == "SH.DYN.MORT" | IndicatorCode == "SP.POP.GROW" | IndicatorCode == "SE.XPD.TOTL.GD.ZS" | IndicatorCode == "NY.GDP.PCAP.CD" | IndicatorCode == "SI.POV.GAPS" | IndicatorCode == "AG.LND.AGRI.ZS") %>%
select(CountryName, IndicatorName, Year, Value) %>%
spread(IndicatorName, Value) %>%
dplyr::rename(
agricul.land = `Agricultural land (% of land area)`,
unemployment = `Unemployment, total (% of total labor force)`,
pov.gap = `Poverty gap at $1.90 a day (2011 PPP) (%)`,
pop.growth = `Population growth (annual %)` ,
mort.rate = `Mortality rate, under-5 (per 1,000)`,
gov.exp.edu = `Government expenditure on education as % of GDP (%)`,
GDP.per.cap = `GDP per capita (current US$)`
)
#View(all_in_all)
new <- subset(all_in_all[ ,3:9])
library(ggcorrplot)
corr <- cor(new, use = "complete.obs")
ggcorrplot(corr, method = "circle")